{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Defining urban census tracts\n",
    "\n",
    "This script identifies tracts that are urbanized and within a core-based statistical area (as of 2020). It uses data including Census TIGER/Line boundaries for census tracts, urban areas, and core-based statistical areas, as well locale classifications from the National Center for Educational Statistics."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import arcgis\n",
    "import arcpy\n",
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#set up workspaces and check database\n",
    "default_gdb = arcpy.mp.ArcGISProject(\"CURRENT\").defaultGeodatabase\n",
    "arcpy.env.overwriteOutput = True\n",
    "print(default_gdb)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Identifying urban tracts on 2020 boundaries\n",
    "This script draws on three boundary line data sources. Download them and import to your geodatabase before proceeding.\n",
    "\n",
    "1) National Historic GIS 2020 census tract boundaries (https://www.nhgis.org/)\n",
    "\n",
    "2) Census TIGER/Line files for 2020 Urban Areas (https://www2.census.gov/geo/tiger/TGRGDB20/tlgdb_2020_a_us_nationgeo.gdb.zip)\n",
    "\n",
    "3) National Center for Education Statistics 2020 Locale Classifications (https://nces.ed.gov/programs/edge/data/edge_locale20_nces_all_us.zip)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#defining inputs\n",
    "tracts = default_gdb + \"\\\\US_tracts_2020\"  ##for data using 2010 tract boundaries, change to 2010 tracts\n",
    "cbsa = default_gdb + \"\\\\CBSA_2020\"\n",
    "urbanareas = default_gdb + \"\\\\urbanareas_2020\"\n",
    "locales = default_gdb + \"\\\\NCES_locales2020\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#select relevant tracts\n",
    "##tracts in urban areas \n",
    "arcpy.management.SelectLayerByLocation(\n",
    "    in_layer=tracts,\n",
    "    overlap_type=\"HAVE_THEIR_CENTER_IN\",\n",
    "    select_features=urbanareas,\n",
    "    search_distance=None,\n",
    "    selection_type=\"NEW_SELECTION\",\n",
    "    invert_spatial_relationship=\"NOT_INVERT\"\n",
    ")\n",
    "\n",
    "##and in a CBSA\n",
    "arcpy.management.SelectLayerByLocation(\n",
    "    in_layer=tracts\",\n",
    "    overlap_type=\"HAVE_THEIR_CENTER_IN\",\n",
    "    select_features=cbsa,\n",
    "    search_distance=None,\n",
    "    selection_type=\"SUBSET_SELECTION\",\n",
    "    invert_spatial_relationship=\"NOT_INVERT\"\n",
    ")\n",
    "\n",
    "## and not in Puerto Ricos\n",
    "arcpy.management.SelectLayerByAttribute(\n",
    "    in_layer_or_view=tracts,\n",
    "    selection_type=\"SUBSET_SELECTION\",\n",
    "    where_clause=\"STATEFP <> '72'\",\n",
    "    invert_where_clause=None\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#join tracts with CBSA\n",
    "arcpy.analysis.SpatialJoin(\n",
    "    target_features=tracts,\n",
    "    join_features=cbsa,\n",
    "    out_feature_class=default_gdb + \"\\\\tracts_byCBSA\",\n",
    "    join_operation=\"JOIN_ONE_TO_ONE\",\n",
    "    join_type=\"KEEP_ALL\",\n",
    "    field_mapping='GISJOIN \"GISJOIN\" true true false 14 Text 0 0,First,#,US_tract_2020,GISJOIN,0,13;GEOID \"GEOID\" true true false 11 Text 0 0,First,#,US_tract_2020,GEOID,0,10;STATEFP \"STATEFP\" true true false 2 Text 0 0,First,#,US_tract_2020,STATEFP,0,1;COUNTYFP \"COUNTYFP\" true true false 3 Text 0 0,First,#,US_tract_2020,COUNTYFP,0,2;TRACTCE \"TRACTCE\" true true false 6 Text 0 0,First,#,US_tract_2020,TRACTCE,0,5;CBSAFP \"CBSAFP\" true true false 5 Text 0 0,First,#,CBSA_2020,CBSAFP,0,4;CBSA_NAME \"CBSA_NAME\" true true false 100 Text 0 0,First,#,CBSA_2020,CBSA_NAME,0,99;CBSA_NAMELSAD \"CBSA_NAMELSAD\" true true false 100 Text 0 0,First,#,CBSA_2020,CBSA_NAMELSAD,0,99',\n",
    "    match_option=\"HAVE_THEIR_CENTER_IN\",\n",
    "    search_radius=None,\n",
    "    distance_field_name=\"\",\n",
    "    match_fields=None\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#join tracts with urban areas\n",
    "arcpy.analysis.SpatialJoin(\n",
    "    target_features=\"tracts_byCBSA\",\n",
    "    join_features=urbanareas,\n",
    "    out_feature_class=default_gdb + \"\\\\tracts_byCBSAUA\",\n",
    "    join_operation=\"JOIN_ONE_TO_ONE\",\n",
    "    join_type=\"KEEP_ALL\",\n",
    "    field_mapping='GISJOIN \"GISJOIN\" true true false 14 Text 0 0,First,#,tracts2020_byCBSA,GISJOIN,0,13;GEOID \"GEOID\" true true false 11 Text 0 0,First,#,tracts2020_byCBSA,GEOID,0,10;STATEFP \"STATEFP\" true true false 2 Text 0 0,First,#,tracts2020_byCBSA,STATEFP,0,1;COUNTYFP \"COUNTYFP\" true true false 3 Text 0 0,First,#,tracts2020_byCBSA,COUNTYFP,0,2;TRACTCE \"TRACTCE\" true true false 6 Text 0 0,First,#,tracts2020_byCBSA,TRACTCE,0,5;CBSAFP \"CBSAFP\" true true false 5 Text 0 0,First,#,tracts2020_byCBSA,CBSAFP,0,4;CBSA_NAME \"NAME\" true true false 100 Text 0 0,First,#,tracts2020_byCBSA,CBSA_NAME,0,99;CBSA_NAMELSAD \"NAMELSAD\" true true false 100 Text 0 0,First,#,tracts2020_byCBSA,CBSA_NAMELSAD,0,99;UAFP \"UAFP\" true true false 5 Text 0 0,First,#,urbanareas_2020,GEOID20,0,4;UA_NAME \"UA_NAME\" true true false 100 Text 0 0,First,#,urbanareas_2020,NAME20,0,99;UA_NAMELSAD \"UA_NAMELSAD\" true true false 100 Text 0 0,First,#,urbanareas_2020,NAMELSAD20,0,99',\n",
    "    match_option=\"HAVE_THEIR_CENTER_IN\",\n",
    "    search_radius=None,\n",
    "    distance_field_name=\"\",\n",
    "    match_fields=None\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#join tracts with locales\n",
    "arcpy.analysis.SpatialJoin(\n",
    "    target_features=\"tracts_byCBSAUA\",\n",
    "    join_features=locales,\n",
    "    out_feature_class=default_gdb + \"\\\\metrotracts_2020\",\n",
    "    join_operation=\"JOIN_ONE_TO_ONE\",\n",
    "    join_type=\"KEEP_ALL\",\n",
    "    field_mapping='GISJOIN \"GISJOIN\" true true false 14 Text 0 0,First,#,tracts2020_byCBSAUA,GISJOIN,0,13;GEOID \"GEOID\" true true false 11 Text 0 0,First,#,tracts2020_byCBSAUA,GEOID,0,10;STATEFP \"STATEFP\" true true false 2 Text 0 0,First,#,tracts2020_byCBSAUA,STATEFP,0,1;COUNTYFP \"COUNTYFP\" true true false 3 Text 0 0,First,#,tracts2020_byCBSAUA,COUNTYFP,0,2;TRACTCE \"TRACTCE\" true true false 6 Text 0 0,First,#,tracts2020_byCBSAUA,TRACTCE,0,5;CBSAFP \"CBSAFP\" true true false 5 Text 0 0,First,#,tracts2020_byCBSAUA,CBSAFP,0,4;CBSA_NAME \"CBSA_NAME\" true true false 100 Text 0 0,First,#,tracts2020_byCBSAUA,CBSA_NAME,0,99;CBSA_NAMELSAD \"CBSA_NAMELSAD\" true true false 100 Text 0 0,First,#,tracts2020_byCBSAUA,CBSA_NAMELSAD,0,99;UAFP \"UAFP\" true true false 5 Text 0 0,First,#,tracts2020_byCBSAUA,UAFP,0,4;UA_NAME \"UA_NAME\" true true false 100 Text 0 0,First,#,tracts2020_byCBSAUA,UA_NAME,0,99;UA_NAMELSAD \"UA_NAMELSAD\" true true false 100 Text 0 0,First,#,tracts2020_byCBSAUA,UA_NAMELSAD,0,99;LOCALE \"LOCALE\" true true false 2 Text 0 0,First,#,NCES_locales2020,LOCALE,0,1;LocaleType \"LocaleType\" true true false 255 Text 0 0,First,#,NCES_locales2020,LocaleType,0,254',\n",
    "    match_option=\"HAVE_THEIR_CENTER_IN\",\n",
    "    search_radius=None,\n",
    "    distance_field_name=\"\",\n",
    "    match_fields=None\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#fix tracts that are missing locale data\n",
    "##select tracts with missing data\n",
    "arcpy.management.SelectLayerByAttribute(\n",
    "    in_layer_or_view=\"metrotracts_2020\",\n",
    "    selection_type=\"NEW_SELECTION\",\n",
    "    where_clause=\"LocaleType IS NULL\",\n",
    "    invert_where_clause=None\n",
    ")\n",
    "\n",
    "##join them to locales using a wider spatial relationship\n",
    "arcpy.analysis.SpatialJoin(\n",
    "    target_features=\"metrotracts_2020\",\n",
    "    join_features=locales,\n",
    "    out_feature_class=default_gdb + \"\\\\metrotracts_2020_corrected\",\n",
    "    join_operation=\"JOIN_ONE_TO_ONE\",\n",
    "    join_type=\"KEEP_ALL\",\n",
    "    field_mapping='GISJOIN \"GISJOIN\" true true false 14 Text 0 0,First,#,metrotracts_2020,GISJOIN,0,13;GEOID \"GEOID\" true true false 11 Text 0 0,First,#,metrotracts_2020,GEOID,0,10;STATEFP \"STATEFP\" true true false 2 Text 0 0,First,#,metrotracts_2020,STATEFP,0,1;COUNTYFP \"COUNTYFP\" true true false 3 Text 0 0,First,#,metrotracts_2020,COUNTYFP,0,2;TRACTCE \"TRACTCE\" true true false 6 Text 0 0,First,#,metrotracts_2020,TRACTCE,0,5;CBSAFP \"CBSAFP\" true true false 5 Text 0 0,First,#,metrotracts_2020,CBSAFP,0,4;CBSA_NAME \"CBSA_NAME\" true true false 100 Text 0 0,First,#,metrotracts_2020,CBSA_NAME,0,99;CBSA_NAMELSAD \"CBSA_NAMELSAD\" true true false 100 Text 0 0,First,#,metrotracts_2020,CBSA_NAMELSAD,0,99;UAFP \"UAFP\" true true false 5 Text 0 0,First,#,metrotracts_2020,UAFP,0,4;UA_NAME \"UA_NAME\" true true false 100 Text 0 0,First,#,metrotracts_2020,UA_NAME,0,99;UA_NAMELSAD \"UA_NAMELSAD\" true true false 100 Text 0 0,First,#,metrotracts_2020,UA_NAMELSAD,0,99;LOCALE \"LOCALE\" true true false 2 Text 0 0,First,#,NCES_locales2020,LOCALE,0,1;LocaleType \"LocaleType\" true true false 255 Text 0 0,First,#,NCES_locales2020,LocaleType,0,254',\n",
    "    match_option=\"LARGEST_OVERLAP\",\n",
    "    search_radius=None,\n",
    "    distance_field_name=\"\",\n",
    "    match_fields=None\n",
    ")\n",
    "\n",
    "##delete selected rows with missing data\n",
    "arcpy.management.SelectLayerByAttribute(\n",
    "    in_layer_or_view=\"metrotracts_2020\",\n",
    "    selection_type=\"NEW_SELECTION\",\n",
    "    where_clause=\"LocaleType IS NULL\",\n",
    "    invert_where_clause=None\n",
    ")\n",
    "arcpy.management.DeleteRows(\n",
    "    in_rows=\"metrotracts_2020\"\n",
    ")\n",
    "\n",
    "##append corrected rows back into main layer\n",
    "arcpy.management.Append(\n",
    "    inputs=\"metrotracts_2020_corrected\",\n",
    "    target=\"metrotracts_2020\",\n",
    "    schema_type=\"TEST\",\n",
    "    field_mapping=None,\n",
    "    subtype=\"\",\n",
    "    expression=\"\",\n",
    "    match_fields=None,\n",
    "    update_geometry=\"NOT_UPDATE_GEOMETRY\"\n",
    ")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
